import scrapy

from scrapytest.items import DoubanItem


class DoubanSpider(scrapy.Spider):
    name = "douban"
    #allowed_domains = ["movie.douban.com"]
    start_urls = ["https://movie.douban.com/top250"]
    movieN = 1

    def parse(self, response):
        # pass

        movieList = response.xpath("/html/body/div[3]/div[1]/div/div[1]/ol/li")

        for movieListi in movieList:
            ## 电影名称
            name = movieListi.xpath(".//div/div[2]/div[1]/a//text()").getall()

            ## 电影创作人员
            movieMakers = movieListi.xpath("./div/div[2]/div[2]/p[1]/text()[1]").getall()

            ## 电影年份、国家、类型
            baseInfo = movieListi.xpath("./div/div[2]/div[2]/p[1]/text()[2]").getall()

            ## 电影评价
            star = movieListi.xpath("./div/div[2]/div[2]/div/span[1]/@class").getall()
            score = movieListi.xpath("./div/div[2]/div[2]/div/span[2]/text()").getall()
            comments = movieListi.xpath("./div/div[2]/div[2]/div/span[4]/text()").getall()

            ## 电影介绍
            desc = movieListi.xpath("./div/div[2]/div[2]/p[2]/span/text()").getall()

            # print(name)
            # print(movieMakers)
            # print(baseInfo)
            # print(star)
            # print(score)
            # print(comments)
            # print(desc)

            ## 电影名称整理
            nameNew = ''
            for namei in name:
                namei = namei.replace("\n", "").replace("\xa0", "").strip()
                nameNew = nameNew + namei
            # print(nameNew)

            # ## 电影导演及演员整理
            movieMakers = movieMakers[0].strip().replace("\n", "").replace("/...", "")

            ## 电影基本信息整理：年份、产地、类型
            baseInfoList = baseInfo[0].strip().replace("\n", "").replace("\t", "").split("/")
            Myear = baseInfoList[0].strip()
            Mcountry = baseInfoList[1].strip()
            Mtype = baseInfoList[2].strip()
            # print(Myear, Mcountry, Mtype)

            ## 其他信息整理
            star = star[0]
            score = score[0]
            comments = comments[0]
            if len(desc) != 0:
                desc = desc[0]
            else:
                desc = ""

            ## 整合所有信息
            movieInfoList = [str(DoubanSpider.movieN), nameNew, movieMakers, Myear, Mcountry, Mtype, star, score, comments, desc]
            movieInfoStr = "^".join(movieInfoList)
            print(movieInfoStr)
            DoubanSpider.movieN = DoubanSpider.movieN + 1
            ## 电影信息保存至容器，用法类似Python 的字典
            doubanItems = DoubanItem()
            doubanItems['Mid'] = str(DoubanSpider.movieN)
            doubanItems['name'] = nameNew
            doubanItems['movieMakers'] = movieMakers
            doubanItems['Myear'] = Myear
            doubanItems['Mcountry'] = Mcountry
            doubanItems['Mtype'] = Mtype
            doubanItems['star'] = star
            doubanItems['score'] = score
            doubanItems['comments'] = comments
            doubanItems['desc'] = desc
            yield doubanItems  ##返回 doubanItems
            
        nextPage = response.xpath('/html/body/div[3]/div[1]/div/div[1]/div[2]/span[3]/a/@href').get()
        if nextPage is not None:
            nextPage = DoubanSpider.start_urls[0] + nextPage
            yield response.follow(nextPage, callback=self.parse)
